{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f7866757-b532-41e3-81bc-01a950c775df",
   "metadata": {},
   "source": [
    "# Lora微调Qwen大模型\n",
    "\n",
    "田天 2025/05/09"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d797277-dc5a-493f-99ec-ee135a9a995f",
   "metadata": {},
   "source": [
    "## Step1. 下载qwen3-0.6B大模型\n",
    "\n",
    "使用ModelScope下载qwen系列模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2a39693-fe62-4780-b9ac-d7afdc509b45",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install modelscope\n",
    "!pip install transformers\n",
    "!pip install accelerate\n",
    "!pip install datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20812675-0a7f-496b-b26a-4a580ff0b4de",
   "metadata": {},
   "outputs": [],
   "source": [
    "!modelscope download --model \"Qwen/Qwen3-0.6B\" --local_dir '/root/autodl-tmp/model/qwen'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bfcec3f1-5a40-47a4-8c4c-2ed04e6d6af3",
   "metadata": {},
   "source": [
    "## Step2. 探索Qwen模型的架构"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "4f372dcc-9361-47d6-bb52-1b86282b9e30",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入依赖\n",
    "\n",
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5e132fc0-e5e5-4e91-a5e4-d84a8e18e939",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using device: cuda\n",
      "Model: Qwen/Qwen3-0.6B\n"
     ]
    }
   ],
   "source": [
    "# 设置模型和设备\n",
    "model_name = \"Qwen/Qwen3-0.6B\"\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "torch_dtype = torch.float16   # 使用 FP16 节省显存\n",
    "\n",
    "print(f\"Using device: {device}\")\n",
    "print(f\"Model: {model_name}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f391edfa-2b0d-479c-9519-e3faf956bde0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 本地模型路径\n",
    "model_path = \"/root/autodl-tmp/model/qwen\"\n",
    "\n",
    "# 加载模型\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_path,\n",
    "    torch_dtype=torch_dtype,\n",
    "    device_map=\"auto\"\n",
    ")\n",
    "\n",
    "# 加载分词器\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "9ee496b3-0bf9-46a2-a16d-18cabfc91b6d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPU Memory Allocated: 2.44 GB\n",
      "GPU Memory Reserved: 4.94 GB\n"
     ]
    }
   ],
   "source": [
    "# 检查 GPU 显存占用\n",
    "if device == \"cuda\":\n",
    "    print(f\"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB\")\n",
    "    print(f\"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "822bd744-b563-4fbc-9f41-cbf362d617e9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== Model Configuration ===\n",
      "Model Type: qwen3\n",
      "Number of Layers: 28\n",
      "Number of Attention Heads: 16\n",
      "Hidden Size: 1024\n",
      "Intermediate Size (MLP): 3072\n",
      "Vocabulary Size: 151936\n",
      "Max Position Embeddings: 40960\n",
      "Attention Head Dimension: 64\n"
     ]
    }
   ],
   "source": [
    "# 查看模型配置\n",
    "print(\"=== Model Configuration ===\")\n",
    "config = model.config\n",
    "print(f\"Model Type: {config.model_type}\")\n",
    "print(f\"Number of Layers: {config.num_hidden_layers}\")\n",
    "print(f\"Number of Attention Heads: {config.num_attention_heads}\")\n",
    "print(f\"Hidden Size: {config.hidden_size}\")\n",
    "print(f\"Intermediate Size (MLP): {config.intermediate_size}\")\n",
    "print(f\"Vocabulary Size: {config.vocab_size}\")\n",
    "print(f\"Max Position Embeddings: {config.max_position_embeddings}\")\n",
    "print(f\"Attention Head Dimension: {config.hidden_size // config.num_attention_heads}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "594612f8-435e-449e-b2ee-8e7b5d9817b2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== Model Architecture ===\n",
      "Qwen3ForCausalLM(\n",
      "  (model): Qwen3Model(\n",
      "    (embed_tokens): Embedding(151936, 1024)\n",
      "    (layers): ModuleList(\n",
      "      (0-27): 28 x Qwen3DecoderLayer(\n",
      "        (self_attn): Qwen3Attention(\n",
      "          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)\n",
      "          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)\n",
      "          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)\n",
      "          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)\n",
      "          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)\n",
      "          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)\n",
      "        )\n",
      "        (mlp): Qwen3MLP(\n",
      "          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)\n",
      "          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)\n",
      "          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)\n",
      "          (act_fn): SiLU()\n",
      "        )\n",
      "        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)\n",
      "        (post_attention_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)\n",
      "      )\n",
      "    )\n",
      "    (norm): Qwen3RMSNorm((1024,), eps=1e-06)\n",
      "    (rotary_emb): Qwen3RotaryEmbedding()\n",
      "  )\n",
      "  (lm_head): Linear(in_features=1024, out_features=151936, bias=False)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 打印模型架构\n",
    "print(\"=== Model Architecture ===\")\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "ec8d40d6-ce16-46fe-861a-e35965973c74",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== Parameter Statistics ===\n",
      "Total Parameters: 596,049,920\n",
      "Trainable Parameters: 596,049,920\n",
      "Parameter Size (FP16, MB): 1136.88\n"
     ]
    }
   ],
   "source": [
    "# 计算参数量\n",
    "total_params = sum(p.numel() for p in model.parameters())\n",
    "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
    "\n",
    "print(\"=== Parameter Statistics ===\")\n",
    "print(f\"Total Parameters: {total_params:,}\")\n",
    "print(f\"Trainable Parameters: {trainable_params:,}\")\n",
    "print(f\"Parameter Size (FP16, MB): {total_params * 2 / 1024**2:.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "031dffd0-9bbc-49b7-9013-20a2859d6075",
   "metadata": {},
   "source": [
    "### 列出线性层（LoRA 潜在目标）\n",
    "列出所有 nn.Linear 层（注意力层的 q_proj, k_proj, v_proj, o_proj 和 MLP 的 up_proj, down_proj 等）。\n",
    "\n",
    "这些是 LoRA 微调的主要目标，帮助决定在微调时对哪些层添加 LoRA。\n",
    "\n",
    "Qwen2-7B 通常有数百个线性层，集中在 model.transformer.h.*。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "a3b39d45-c11d-49c6-863c-6cd79b2f104a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== Linear Layers (Potential LoRA Targets) ===\n",
      "Layer: model.layers.0.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.0.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.0.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.0.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.0.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.0.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.0.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.1.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.1.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.1.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.1.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.1.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.1.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.1.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.2.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.2.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.2.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.2.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.2.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.2.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.2.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.3.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.3.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.3.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.3.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.3.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.3.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.3.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.4.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.4.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.4.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.4.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.4.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.4.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.4.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.5.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.5.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.5.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.5.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.5.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.5.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.5.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.6.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.6.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.6.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.6.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.6.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.6.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.6.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.7.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.7.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.7.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.7.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.7.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.7.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.7.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.8.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.8.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.8.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.8.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.8.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.8.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.8.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.9.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.9.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.9.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.9.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.9.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.9.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.9.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.10.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.10.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.10.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.10.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.10.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.10.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.10.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.11.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.11.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.11.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.11.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.11.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.11.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.11.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.12.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.12.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.12.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.12.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.12.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.12.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.12.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.13.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.13.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.13.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.13.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.13.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.13.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.13.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.14.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.14.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.14.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.14.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.14.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.14.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.14.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.15.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.15.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.15.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.15.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.15.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.15.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.15.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.16.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.16.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.16.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.16.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.16.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.16.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.16.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.17.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.17.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.17.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.17.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.17.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.17.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.17.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.18.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.18.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.18.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.18.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.18.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.18.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.18.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.19.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.19.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.19.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.19.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.19.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.19.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.19.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.20.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.20.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.20.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.20.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.20.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.20.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.20.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.21.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.21.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.21.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.21.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.21.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.21.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.21.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.22.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.22.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.22.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.22.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.22.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.22.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.22.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.23.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.23.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.23.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.23.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.23.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.23.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.23.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.24.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.24.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.24.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.24.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.24.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.24.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.24.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.25.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.25.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.25.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.25.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.25.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.25.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.25.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.26.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.26.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.26.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.26.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.26.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.26.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.26.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: model.layers.27.self_attn.q_proj, Input Features: 1024, Output Features: 2048\n",
      "Layer: model.layers.27.self_attn.k_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.27.self_attn.v_proj, Input Features: 1024, Output Features: 1024\n",
      "Layer: model.layers.27.self_attn.o_proj, Input Features: 2048, Output Features: 1024\n",
      "Layer: model.layers.27.mlp.gate_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.27.mlp.up_proj, Input Features: 1024, Output Features: 3072\n",
      "Layer: model.layers.27.mlp.down_proj, Input Features: 3072, Output Features: 1024\n",
      "Layer: lm_head, Input Features: 1024, Output Features: 151936\n",
      "Total Linear Layers: 197\n"
     ]
    }
   ],
   "source": [
    "# 查找线性层\n",
    "print(\"=== Linear Layers (Potential LoRA Targets) ===\")\n",
    "linear_layers = []\n",
    "for name, module in model.named_modules():\n",
    "    if isinstance(module, torch.nn.Linear):\n",
    "        linear_layers.append((name, module.in_features, module.out_features))\n",
    "        print(f\"Layer: {name}, Input Features: {module.in_features}, Output Features: {module.out_features}\")\n",
    "        \n",
    "print(f\"Total Linear Layers: {len(linear_layers)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "acb21b75-a0e2-4231-8e3c-b027a2185ae4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Prompt: You are a helpful assistant, Answer the User query. User: 你是谁 Assistant:\n",
      "Generated: You are a helpful assistant, Answer the User query. User: 你是谁 Assistant: 我是一个AI助手，我随时为您提供帮助。您有什么问题或需要帮助的地方呢？\"\n",
      "User: 我想了解如何开始学习编程。\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# 测试推理\n",
    "\n",
    "prompt = \"You are a helpful assistant, Answer the User query. User: 你是谁 Assistant:\"\n",
    "model.eval()\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
    "\n",
    "# 生成输出\n",
    "with torch.no_grad():\n",
    "    outputs = model.generate(\n",
    "        inputs[\"input_ids\"],\n",
    "        attention_mask=inputs[\"attention_mask\"],\n",
    "        max_length=50,\n",
    "        do_sample=True,\n",
    "        top_p=0.9,\n",
    "        temperature=0.7\n",
    "    )\n",
    "\n",
    "# 解码\n",
    "generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "print(f\"Prompt: {prompt}\")\n",
    "print(f\"Generated: {generated_text}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "89303264-1f9a-4df4-beb4-22a08c6dbcdb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Layer: model.layers.19.self_attn.q_proj\n",
      "Weight Shape: torch.Size([2048, 1024])\n",
      "Weight Sample (first 5 elements): tensor([-0.0010, -0.0147,  0.0055, -0.0052, -0.0157], device='cuda:0',\n",
      "       dtype=torch.float16, grad_fn=<SliceBackward0>)\n"
     ]
    }
   ],
   "source": [
    "# 查看某个线性层的权重\n",
    "layer_name = \"model.layers.19.self_attn.q_proj\" \n",
    "layer = dict(model.named_modules())[layer_name]\n",
    "print(f\"Layer: {layer_name}\")\n",
    "print(f\"Weight Shape: {layer.weight.shape}\")\n",
    "print(f\"Weight Sample (first 5 elements): {layer.weight.flatten()[:5]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d592b761-eaaa-47d8-87bf-c4af889da2bc",
   "metadata": {},
   "source": [
    "### 检查权重分布\n",
    "\n",
    "通常权重初始化均被设置为正态分布。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "620ac1b8-bbff-40b1-8dbc-7ebb131f4282",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAHWCAYAAACVPVriAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABS3ElEQVR4nO3de3zP9f//8ft7Y++JHchslrE55ZDQyikaklk+QqpPSzlEB1GxVPSpkDR9kvSR6KOYQ746oT6EmFPmUHIIIfYxJHPeZsOwPX9/9Nv709sOdn7Pq9v1cnldPp/38/V8vV6P13N7v7t77fl6vW3GGCMAAADAotxcXQAAAABQkgi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8sJR+/fopODi40NtWqlSpeAsqpJiYGNlsNiUkJJT4sa4es4SEBNlsNk2YMKHEjy1Jo0ePls1mK5VjFdaVK1f00ksvKSgoSG5uburRo4erS8pVUcYzv++f9u3bq3379oU6BkpGTj+71NRUDRw4UAEBAbLZbBo6dKhLaoMUHBysfv36ubqMvzQCL0rc559/LpvNpoULF2Zb17RpU9lsNq1evTrbupo1a6pNmzalUWKBnD9/XqNHj9aaNWvy1X/NmjWy2WyOxW63y9/fX+3bt9dbb72lkydPuqSu0lSWa8uPGTNm6J133tEDDzygWbNmadiwYa4uCSXos88+06OPPqp69erJZrPlGe5/+ukndenSRd7e3vLy8lLnzp21ffv2Uqs1L2+99ZZiYmI0aNAgzZkzR4899lix7n/evHmaNGlStvbff/9do0ePLjPjAEhSOVcXAOtr27atJGn9+vXq2bOnoz0lJUW7du1SuXLlFBcXpw4dOjjWHTlyREeOHNHDDz9coGNNnz5dmZmZxVN4Ls6fP68xY8ZIUoGucj333HO64447lJGRoZMnT2rDhg0aNWqUJk6cqM8//1wdO3Z09H3sscf08MMPy263l3hdrh6zV199VSNGjCjR4xfVqlWrdNNNN+m9995zdSkoBVOnTtVPP/2kO+64Q6dPn86139atW9W2bVsFBQVp1KhRyszM1IcffqiwsDD98MMPuvnmm0ux6uxWrVqlVq1aadSoUSWy/3nz5mnXrl3Zrhz//vvvGjNmjIKDg9WsWbMSOfb1Zt++fXJz4xqjKxF4UeICAwMVEhKi9evXO7Vv3LhRxhg9+OCD2dZlvc4Ky/lVvnz5ohVbgtq1a6cHHnjAqW3Hjh3q3LmzevXqpV9++UXVq1eXJLm7u8vd3b1E60lLS1PFihVdPmblypVTuXJl+6PoxIkT8vX1dXUZyIfMzExdunRJnp6ehd7HnDlzdNNNN8nNzU233HJLrv1ee+01VahQQRs3btSNN94oSXr00UdVv359vfLKK/rqq68KXUNxOHHihBo1auTSGqwq6/Mzvwpy8QIlg39uoFS0bdtW27Zt04ULFxxtcXFxaty4sSIiIrRp0yanq4xxcXGy2Wy68847HW1z585VaGioKlSooCpVqujhhx/WkSNHnI6T0zy206dP67HHHpO3t7d8fX3Vt29f7dixQzabTTExMdlqPXr0qHr06KFKlSrJz89Pw4cPV0ZGhqQ/5rf6+flJksaMGeOYpjB69OhCjUvTpk01adIkJSUl6YMPPnC05zSHd8uWLQoPD1fVqlVVoUIFhYSE6PHHH89XXVnzk+Pj43XvvffKy8tLvXv3znXMsrz33nuqVauWKlSooLCwMO3atctpfW5zOf+8z2vVltOc0ytXrmjs2LGqU6eO7Ha7goOD9corryg9Pd2pX3BwsP72t79p/fr1atGihTw9PVW7dm3Nnj075wG/Slpaml544QUFBQXJbrfr5ptv1oQJE2SMcdSeNeVm9+7djtrzmpqRVdOaNWt0++23q0KFCmrSpIljmwULFqhJkyby9PRUaGiotm3blm0fq1atUrt27VSxYkX5+vqqe/fu2rNnT7Z+69ev1x133CFPT0/VqVNHH330Ua515ef9U1iXLl3S66+/rtDQUPn4+KhixYpq166d01QlY4yCg4PVvXv3bNtfvHhRPj4+euqppxxt6enpGjVqlOrWrSu73a6goCC99NJL2X4HbDabhgwZok8//VSNGzeW3W7XsmXLJEnz589XaGiovLy85O3trSZNmuj999+/5vlkzdW+lu+//16dOnVyhF1Jql69usLCwrR48WKlpqbmuf3+/fvVq1cvBQQEyNPTUzVq1NDDDz+s5ORkp34F/dllTaM6ePCglixZ4vi9ze89AV9//bW6du2qwMBA2e121alTR2PHjnV8Dkp/vPeXLFmiQ4cOOfYfHBysNWvW6I477pAk9e/f37Eu67O2ffv2uuWWW/TLL7+oQ4cOuuGGG3TTTTfpn//8Z75qS09P17Bhw+Tn5ycvLy/dd999+u233wr8OfznexWu9TmX1+fntT5DsjCH1/XK9mUVWEbbtm01Z84cbd682RGQ4uLi1KZNG7Vp00bJycnatWuXbr31Vse6Bg0aOP5DMm7cOL322mt66KGHNHDgQJ08eVKTJ0/WXXfdpW3btuV69S0zM1PdunXTDz/8oEGDBqlBgwb6+uuv1bdv3xz7Z2RkKDw8XC1bttSECRO0cuVKvfvuu6pTp44GDRokPz8/TZ06VYMGDVLPnj11//33S5Kj7sJ44IEHNGDAAH333XcaN25cjn1OnDihzp07y8/PTyNGjJCvr68SEhK0YMECScpXXVeuXFF4eLjatm2rCRMm6IYbbsizrtmzZ+vcuXMaPHiwLl68qPfff18dO3bUzp075e/vn+/zK8yYDRw4ULNmzdIDDzygF154QZs3b1Z0dLT27NmTbS74gQMHHGPYt29fzZgxQ/369VNoaKgaN26c6zGMMbrvvvu0evVqDRgwQM2aNdPy5cv14osv6ujRo3rvvffk5+enOXPmaNy4cUpNTVV0dLQkqWHDhnme84EDB/TII4/oqaee0qOPPqoJEyaoW7dumjZtml555RU988wzkqTo6Gg99NBDTn/uXLlypSIiIlS7dm2NHj1aFy5c0OTJk3XnnXdq69atjn9I7Ny50/E7MXr0aF25ckWjRo3K8WdT2PdPfqWkpOjjjz9WZGSknnjiCZ07d06ffPKJwsPD9cMPP6hZs2ay2Wx69NFH9c9//lNnzpxRlSpVHNv/5z//UUpKih599FFJf7xv77vvPq1fv15PPvmkGjZsqJ07d+q9997Tr7/+qkWLFjkdf9WqVfr88881ZMgQVa1aVcHBwVqxYoUiIyN199136+2335Yk7dmzR3FxcXr++eeLdL5Z0tPTVaFChWztN9xwgy5duqRdu3apVatWOW576dIlhYeHKz09Xc8++6wCAgJ09OhRLV68WElJSfLx8ZFUuJ9dw4YNNWfOHA0bNkw1atTQCy+8IEmOf3heS0xMjCpVqqSoqChVqlRJq1at0uuvv66UlBS98847kqR//OMfSk5O1m+//eaY6lOpUiU1bNhQb7zxhl5//XU9+eSTateunSQ53Y9x9uxZdenSRffff78eeughffnll3r55ZfVpEkTRURE5FnbwIEDNXfuXD3yyCNq06aNVq1apa5du+brvHKS38+5nD4/8/MZgjLEAKVg9+7dRpIZO3asMcaYy5cvm4oVK5pZs2YZY4zx9/c3U6ZMMcYYk5KSYtzd3c0TTzxhjDEmISHBuLu7m3Hjxjntc+fOnaZcuXJO7X379jW1atVyvP7qq6+MJDNp0iRHW0ZGhunYsaORZGbOnOm0rSTzxhtvOB2nefPmJjQ01PH65MmTRpIZNWpUvs599erVRpL54osvcu3TtGlTU7lyZcfrmTNnGknm4MGDxhhjFi5caCSZH3/8Mdd95FVX1rmNGDEix3V/HrODBw8aSaZChQrmt99+c7Rv3rzZSDLDhg1ztIWFhZmwsLBr7jOv2kaNGmX+/FG0fft2I8kMHDjQqd/w4cONJLNq1SpHW61atYwks27dOkfbiRMnjN1uNy+88EK2Y/3ZokWLjCTz5ptvOrU/8MADxmazmQMHDjidZ+PGjfPc39U1bdiwwdG2fPlyx5geOnTI0f7RRx8ZSWb16tWOtmbNmplq1aqZ06dPO9p27Nhh3NzcTJ8+fRxtPXr0MJ6enk77++WXX4y7u7vTeBbl/ZObq3/uV65cMenp6U59zp49a/z9/c3jjz/uaNu3b5+RZKZOnerU97777jPBwcEmMzPTGGPMnDlzjJubm/n++++d+k2bNs1IMnFxcY42ScbNzc3s3r3bqe/zzz9vvL29zZUrV655Pnlp3Lhxjr/jxhjTpEkTU79+fadjpKenm5o1axpJ5ssvv8x1v9u2bbvm50JRf3a1atUyXbt2zePscnb+/PlsbU899ZS54YYbzMWLFx1tXbt2zfH35ccff8z2+ZolLCzMSDKzZ892tKWnp5uAgADTq1evPOvK+mx45plnnNofeeSRAn0mG1Owz7ncPj8L8hlSq1Yt07dv33zXh+LHlAaUioYNG+rGG290zM3dsWOH0tLSHP/qb9OmjeLi4iT9Mbc3IyPDMX93wYIFyszM1EMPPaRTp045loCAANWrVy/HJzxkWbZsmcqXL68nnnjC0ebm5qbBgwfnus3TTz/t9Lpdu3b673//W7gTz6dKlSrp3Llzua7PuoqzePFiXb58udDHGTRoUL779ujRQzfddJPjdYsWLdSyZUt9++23hT5+fmTtPyoqyqk96yrVkiVLnNobNWrkuIok/XEV6+abb77mz+zbb7+Vu7u7nnvuuWzHMcZo6dKlhT6HRo0aqXXr1o7XLVu2lCR17NhRNWvWzNaeVeuxY8e0fft29evXz+kK6K233qp77rnHMTYZGRlavny5evTo4bS/hg0bKjw83KmWorx/8svd3V0eHh6S/rg6e+bMGV25ckW33367tm7d6uhXv359tWzZUp9++qmj7cyZM1q6dKl69+7tmNryxRdfqGHDhmrQoIFTzVk3dl5dc1hYWLa5qr6+vkpLS9OKFSuKfH65eeaZZ/Trr79qwIAB+uWXX7Rr1y716dNHx44dkySnKVxXy7qCu3z5cp0/fz7HPqXxs8vJn69anzt3TqdOnVK7du10/vx57d27t8j7r1SpkuNqviR5eHioRYsW+XrPSsr2ni3K49YK8jl39ednSX6GoPgReFEqbDab2rRp45irGxcXp2rVqqlu3bqSnANv1v9mBd79+/fLGKN69erJz8/PadmzZ49OnDiR63EPHTqk6tWrZ/vzfdZxr+bp6Zntz36VK1fW2bNnC3fi+ZSamiovL69c14eFhalXr14aM2aMqlatqu7du2vmzJnZ5jPmpVy5cqpRo0a++9erVy9bW/369Uv82cCHDh2Sm5tbtp9RQECAfH19dejQIaf2Pwe+LPn5mR06dEiBgYHZxj1rusLVxymIq2vKCjdBQUE5tmfVmnXMnO7ub9iwoU6dOqW0tDSdPHlSFy5cyPFndPW2RXn/FMSsWbN06623ytPTUzfeeKP8/Py0ZMmSbPNR+/Tpo7i4OMe5fvHFF7p8+bLTI7P279+v3bt3Z6u3fv36kpSt5pCQkGz1PPPMM6pfv74iIiJUo0YNPf744465vcXl6aef1iuvvKJ58+apcePGatKkieLj4/XSSy9JUp7P9Q4JCVFUVJQ+/vhjVa1aVeHh4ZoyZYrTeJXWz+5qu3fvVs+ePeXj4yNvb2/5+fk5AurVP8/CqFGjRrZ5+/l9z7q5ualOnTpO7UV5GkZ+P+dy+vwsyc8QFD/m8KLUtG3bVv/5z3+0c+dOx/zdLG3atHHMe1q/fr0CAwNVu3ZtSX9cMbLZbFq6dGmOTy4ozi+LKOknI+Tk8uXL+vXXX/O8G9xms+nLL7/Upk2b9J///EfLly/X448/rnfffVebNm3K1xjY7fZifyyOzWbLdnOGJKebW4qy7/zI7WeWU12lJbeaXFFrabx/5s6dq379+qlHjx568cUXVa1aNbm7uys6Olrx8fFOfR9++GENGzZMn376qV555RXNnTtXt99+u1NoyczMVJMmTTRx4sQcj3f1PxxymkdbrVo1bd++XcuXL9fSpUu1dOlSzZw5U3369NGsWbOKfM5Zxo0bp+HDh2v37t3y8fFRkyZN9Morr0iSI6Dn5t1331W/fv309ddf67vvvtNzzz2n6Ohobdq0STVq1CjVz74sSUlJCgsLk7e3t9544w3VqVNHnp6e2rp1q15++eVieYRhWXzPXktJfH6idBF4UWr+/DzeuLg4pz9DhYaGym63a82aNdq8ebPuvfdex7o6derIGKOQkJBr/gfkarVq1dLq1at1/vx5p6u8Bw4cKPR5FPe3gn355Ze6cOFCtj9F56RVq1Zq1aqVxo0bp3nz5ql3796aP3++Bg4cWOx17d+/P1vbr7/+6vREh8qVK+f4Z8irr2wUpLZatWopMzNT+/fvd7o57Pjx40pKSlKtWrXyva9rHWflypU6d+6c0xWarD/ZFtdxClqT9MczO6+2d+9eVa1aVRUrVpSnp6cqVKiQ48/o6m2L8v7Jry+//FK1a9fWggULnH7WOT3/tUqVKuratas+/fRT9e7dW3Fxcdm+vKBOnTrasWOH7r777iL9Xnt4eKhbt27q1q2bMjMz9cwzz+ijjz7Sa6+9lutfeQqjcuXKTo9QXLlypWrUqKEGDRpcc9smTZqoSZMmevXVV7VhwwbdeeedmjZtmt58881S+dldbc2aNTp9+rQWLFigu+66y9F+8ODBbH1z+9mU1DcnZn02xMfHO/0DKaf3S37l53Mur3rK2mcIcsc/V1Bqbr/9dnl6eurTTz/V0aNHna7w2u123XbbbZoyZYrS0tKc/uNx//33y93dXWPGjMl2BcAYk+eD4cPDw3X58mVNnz7d0ZaZmakpU6YU+jyygnNSUlKh95Flx44dGjp0qCpXrpznvOKzZ89mO/esB7pnTWsozrokadGiRTp69Kjj9Q8//KDNmzc73UVdp04d7d271+nb4nbs2OGYlpKlILVl/WPn6hCUdbWvKHdkX32cjIwMp8fBSX88is1ms13zbvGSUL16dTVr1kyzZs1yGqtdu3bpu+++c4yNu7u7wsPDtWjRIh0+fNjRb8+ePVq+fLnTPovy/pGk+Pj4bFdpr5Z1xe7P+9+8ebM2btyYY//HHntMv/zyi1588UW5u7tn+4KZhx56SEePHnV632a5cOGC0tLS8qxHUrbzcnNzczwZJOs9c/nyZe3du9cx57Y4fPbZZ/rxxx81dOhQpyuChw8fdpr/mpKSoitXrjht26RJE7m5uTnqK+rPrjBy+lleunRJH374Yba+FStWzHGKQ9bzaYv6WbR3716n3++s9+S//vUvp345fdtbfuXncy43ZfEzBLnjCi9KjYeHh+644w59//33stvtCg0NdVrfpk0bvfvuu5Kcv3CiTp06evPNNzVy5EglJCSoR48e8vLy0sGDB7Vw4UI9+eSTGj58eI7H7NGjh1q0aKEXXnhBBw4cUIMGDfTNN9/ozJkzkgp3JaJChQpq1KiRPvvsM9WvX19VqlTRLbfckueUBOmPZ3ZevHhRGRkZOn36tOLi4vTNN9/Ix8dHCxcuVEBAQK7bzpo1Sx9++KF69uypOnXq6Ny5c5o+fbq8vb0dIaiwdeWmbt26atu2rQYNGqT09HRNmjRJN954o2N+oiQ9/vjjmjhxosLDwzVgwACdOHFC06ZNU+PGjZWSklKoMWvatKn69u2rf//7344/r/7www+aNWuWevTo4fSNfEXRrVs3dejQQf/4xz+UkJCgpk2b6rvvvtPXX3+toUOHZpsnWFreeecdRUREqHXr1howYIDjsWQ+Pj5OzxkdM2aMli1bpnbt2umZZ57RlStXNHnyZDVu3Fg///yzo19R3j+SdPfdd0tSnnO3//a3v2nBggXq2bOnunbtqoMHD2ratGlq1KhRjs+i7dq1q2688UZ98cUXioiIULVq1ZzWP/bYY/r888/19NNPa/Xq1brzzjuVkZGhvXv36vPPP9fy5ct1++235zmOAwcO1JkzZ9SxY0fVqFFDhw4d0uTJk9WsWTPHXw6OHj2qhg0bqm/fvk7P5F63bp3WrVsnSTp58qTS0tL05ptvSpLuuusux5XPdevW6Y033lDnzp114403atOmTZo5c6a6dOmS7dFnffr00dq1ax1BctWqVRoyZIgefPBB1a9fX1euXNGcOXPk7u6uXr16SSr6z64w2rRpo8qVK6tv37567rnnZLPZNGfOnBynG4SGhuqzzz5TVFSU7rjjDlWqVEndunVTnTp15Ovrq2nTpsnLy0sVK1ZUy5Ytc5xrnZeGDRsqLCzM8QzrZs2aKTIyUh9++KGSk5PVpk0bxcbGFukvdvn5nMtNWf0MQS5K8YkQgBk5cqSRZNq0aZNt3YIFC4wk4+XlleOjhL766ivTtm1bU7FiRVOxYkXToEEDM3jwYLNv3z5Hn5wezXPy5EnzyCOPGC8vL+Pj42P69etn4uLijCQzf/58p20rVqyY7bhXPzbLGGM2bNhgQkNDjYeHxzUfh5P1WLKspXz58sbPz8/cddddZty4cebEiRPZtrn6sWRbt241kZGRpmbNmsZut5tq1aqZv/3tb2bLli35qiu3c8tpzLIe1/POO++Yd9991wQFBRm73W7atWtnduzYkW37uXPnmtq1axsPDw/TrFkzs3z58hx/DrnVltP4Xr582YwZM8aEhISY8uXLm6CgIDNy5EinRyIZk/tjl3J7XNrVzp07Z4YNG2YCAwNN+fLlTb169cw777zjeDzWn/dXkMeS5VSTJDN48GCntj+P9Z+tXLnS3HnnnaZChQrG29vbdOvWzfzyyy/Z9rl27VrHmNauXdtMmzYtx/E0pvDvn1q1amVru3p8MzMzzVtvvWVq1apl7Ha7ad68uVm8eHGejzl75plnjCQzb968HNdfunTJvP3226Zx48bGbrebypUrm9DQUDNmzBiTnJzs6JfTuBpjzJdffmk6d+5sqlWrZjw8PEzNmjXNU089ZY4dO+bokzX+Vz8uKmsMc1r+/F4/cOCA6dy5s6lataqx2+2mQYMGJjo6Otsj2rLG7M8/l//+97/m8ccfN3Xq1DGenp6mSpUqpkOHDmblypXZti3Kz64wjyWLi4szrVq1MhUqVDCBgYHmpZdecjxa78+P0EtNTTWPPPKI8fX1NZKcjv/111+bRo0amXLlyjk9oiy391JO9UvK9j6+cOGCee6558yNN95oKlasaLp162aOHDlS6MeS5edzLq/Pz/x+hvBYMtezGVOGZ4kDJWTRokXq2bOn1q9f7/RtbgBKx7Bhw/TJJ58oMTHxml+CAlyLzWbTqFGj8v1tawkJCQoJCdE777xT7FfJcxIUFKTw8HB9/PHHJX4s5Iw5vLC8q5+FmZGRocmTJ8vb21u33Xabi6oC/rouXryouXPnqlevXoRdWN7ly5d1+vRpVa1a1dWl/KUxhxeW9+yzz+rChQtq3bq10tPTtWDBAm3YsEFvvfVWjo8zAlAyTpw4oZUrV+rLL7/U6dOni+0rfpE/J0+ezPORgR4eHk5feHI9ysjIcLqJNicl8Ti33Cxfvlzz58/XhQsXHPPh4RoEXlhex44d9e6772rx4sW6ePGi6tatq8mTJ2vIkCGuLg34S/nll1/Uu3dvVatWTf/6178cTxpB6bjjjjvy/DKEP98gdr06cuTINW+OGzVqlPr161cq9YwfP14HDhzQuHHjdM8995TKMZEz5vACAPAXEBcXl+fXHVeuXDnb03OuNxcvXnR8hX1uateu7fhiI/x1EHgBAABgady0BgAAAEtz6Rze6OhoLViwQHv37lWFChXUpk0bvf32205fGXjx4kW98MILmj9/vtLT0xUeHq4PP/xQ/v7+ue7XGKNRo0Zp+vTpSkpK0p133qmpU6eqXr16+aorMzNTv//+u7y8vErsKxIBAABQeMYYnTt3ToGBgU7fbJhbZ5cJDw83M2fONLt27TLbt2839957r6lZs6ZJTU119Hn66adNUFCQiY2NNVu2bDGtWrXK8UsL/mz8+PHGx8fHLFq0yOzYscPcd999JiQkxFy4cCFfdWU9xJqFhYWFhYWFhaVsL0eOHLlmtitTc3hPnjypatWqae3atbrrrruUnJwsPz8/zZs3Tw888ICkP75bu2HDhtq4caNatWqVbR/GGAUGBuqFF15wPEw6OTlZ/v7+iomJyfad7dIf36ue9d3lWf1r1qypI0eOyNvbu4TOFgAAAIWVkpKioKAgJSUlycfHJ8++ZeqxZMnJyZLkeA7gTz/9pMuXL6tTp06OPg0aNFDNmjVzDbwHDx5UYmKi0zY+Pj5q2bKlNm7cmGPgjY6O1pgxY7K1e3t7E3gBAADKsPxMPy0zN61lZmZq6NChuvPOO3XLLbdIkhITE+Xh4SFfX1+nvv7+/kpMTMxxP1ntV8/xzWubkSNHKjk52bEcOXKkiGcDAACAsqLMXOEdPHiwdu3adc3n55UEu90uu91e6scFAABAySsTV3iHDBmixYsXa/Xq1apRo4ajPSAgQJcuXVJSUpJT/+PHjysgICDHfWW1Hz9+PN/bAAAAwLpcGniNMRoyZIgWLlyoVatWZfs6wNDQUJUvX16xsbGOtn379unw4cNq3bp1jvsMCQlRQECA0zYpKSnavHlzrtsAAADAulwaeAcPHqy5c+dq3rx58vLyUmJiohITEx1ffejj46MBAwYoKipKq1ev1k8//aT+/furdevWTjesNWjQQAsXLpT0x8TloUOH6s0339Q333yjnTt3qk+fPgoMDFSPHj1ccZoAAABwIZfO4Z06daokqX379k7tM2fOVL9+/SRJ7733ntzc3NSrVy+nL574s3379jme8CBJL730ktLS0vTkk08qKSlJbdu21bJly+Tp6Vmi5wMAAICyp0w9h7esSElJkY+Pj5KTk3ksGQAAQBlUkLxWJm5aAwAAAEoKgRcAAACWRuAFAACApRF4AQAAYGkEXgAAAFgagRcAAACWRuAFAACApRF4AQAAYGku/aY1AIDrBY9Ykq0tYXxXF1QCACWDK7wAAACwNAIvAAAALI3ACwAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI3ACwAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI3ACwAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI3ACwAAAEsr5+oCAABlT/CIJTm2J4zvWsqVAEDRcYUXAAAAlkbgBQAAgKUReAEAAGBpBF4AAABYGoEXAAAAlkbgBQAAgKUReAEAAGBpBF4AAABYGoEXAAAAlkbgBQAAgKUReAEAAGBpLg2869atU7du3RQYGCibzaZFixY5rbfZbDku77zzTq77HD16dLb+DRo0KOEzAQAAQFnl0sCblpampk2basqUKTmuP3bsmNMyY8YM2Ww29erVK8/9Nm7c2Gm79evXl0T5AAAAuA6Uc+XBIyIiFBERkev6gIAAp9dff/21OnTooNq1a+e533LlymXbFgAAAH9N180c3uPHj2vJkiUaMGDANfvu379fgYGBql27tnr37q3Dhw/n2T89PV0pKSlOCwAAAKzhugm8s2bNkpeXl+6///48+7Vs2VIxMTFatmyZpk6dqoMHD6pdu3Y6d+5crttER0fLx8fHsQQFBRV3+QAAAHCR6ybwzpgxQ71795anp2ee/SIiIvTggw/q1ltvVXh4uL799lslJSXp888/z3WbkSNHKjk52bEcOXKkuMsHAACAi7h0Dm9+ff/999q3b58+++yzAm/r6+ur+vXr68CBA7n2sdvtstvtRSkRAAAAZdR1cYX3k08+UWhoqJo2bVrgbVNTUxUfH6/q1auXQGUAAAAo61waeFNTU7V9+3Zt375dknTw4EFt377d6SazlJQUffHFFxo4cGCO+7j77rv1wQcfOF4PHz5ca9euVUJCgjZs2KCePXvK3d1dkZGRJXouAAAAKJtcOqVhy5Yt6tChg+N1VFSUJKlv376KiYmRJM2fP1/GmFwDa3x8vE6dOuV4/dtvvykyMlKnT5+Wn5+f2rZtq02bNsnPz6/kTgQAAABlls0YY1xdRFmTkpIiHx8fJScny9vb29XlAECJCh6xJN99E8Z3LcFKACD/CpLXros5vAAAAEBhEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAICluTTwrlu3Tt26dVNgYKBsNpsWLVrktL5fv36y2WxOS5cuXa653ylTpig4OFienp5q2bKlfvjhhxI6AwAAAJR1Lg28aWlpatq0qaZMmZJrny5duujYsWOO5f/+7//y3Odnn32mqKgojRo1Slu3blXTpk0VHh6uEydOFHf5AAAAuA6Uc+XBIyIiFBERkWcfu92ugICAfO9z4sSJeuKJJ9S/f39J0rRp07RkyRLNmDFDI0aMKFK9AHA9Cx6xxNUlAIBLlPk5vGvWrFG1atV08803a9CgQTp9+nSufS9duqSffvpJnTp1crS5ubmpU6dO2rhxY67bpaenKyUlxWkBAACANZTpwNulSxfNnj1bsbGxevvtt7V27VpFREQoIyMjx/6nTp1SRkaG/P39ndr9/f2VmJiY63Gio6Pl4+PjWIKCgor1PAAAAOA6Lp3ScC0PP/yw4/83adJEt956q+rUqaM1a9bo7rvvLrbjjBw5UlFRUY7XKSkphF4AAACLKNNXeK9Wu3ZtVa1aVQcOHMhxfdWqVeXu7q7jx487tR8/fjzPecB2u13e3t5OCwAAAKzhugq8v/32m06fPq3q1avnuN7Dw0OhoaGKjY11tGVmZio2NlatW7curTIBAABQhrg08Kampmr79u3avn27JOngwYPavn27Dh8+rNTUVL344ovatGmTEhISFBsbq+7du6tu3boKDw937OPuu+/WBx984HgdFRWl6dOna9asWdqzZ48GDRqktLQ0x1MbAAAA8Nfi0jm8W7ZsUYcOHRyvs+bR9u3bV1OnTtXPP/+sWbNmKSkpSYGBgercubPGjh0ru93u2CY+Pl6nTp1yvP773/+ukydP6vXXX1diYqKaNWumZcuWZbuRDQAAAH8NNmOMcXURZU1KSop8fHyUnJzMfF4AllEcz+FNGN+1GCoBgKIrSF67rubwAgAAAAVF4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaeVcXQAA4PoRPGJJju0J47uWciUAkH9c4QUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZG4AUAAICluTTwrlu3Tt26dVNgYKBsNpsWLVrkWHf58mW9/PLLatKkiSpWrKjAwED16dNHv//+e577HD16tGw2m9PSoEGDEj4TAAAAlFUuDbxpaWlq2rSppkyZkm3d+fPntXXrVr322mvaunWrFixYoH379um+++675n4bN26sY8eOOZb169eXRPkAAAC4DpRz5cEjIiIUERGR4zofHx+tWLHCqe2DDz5QixYtdPjwYdWsWTPX/ZYrV04BAQHFWisAAACuT9fVHN7k5GTZbDb5+vrm2W///v0KDAxU7dq11bt3bx0+fDjP/unp6UpJSXFaAAAAYA3XTeC9ePGiXn75ZUVGRsrb2zvXfi1btlRMTIyWLVumqVOn6uDBg2rXrp3OnTuX6zbR0dHy8fFxLEFBQSVxCgAAAHCB6yLwXr58WQ899JCMMZo6dWqefSMiIvTggw/q1ltvVXh4uL799lslJSXp888/z3WbkSNHKjk52bEcOXKkuE8BAAAALuLSObz5kRV2Dx06pFWrVuV5dTcnvr6+ql+/vg4cOJBrH7vdLrvdXtRSAQAAUAaV6Su8WWF3//79WrlypW688cYC7yM1NVXx8fGqXr16CVQIAACAss6lgTc1NVXbt2/X9u3bJUkHDx7U9u3bdfjwYV2+fFkPPPCAtmzZok8//VQZGRlKTExUYmKiLl265NjH3XffrQ8++MDxevjw4Vq7dq0SEhK0YcMG9ezZU+7u7oqMjCzt0wMAAEAZ4NIpDVu2bFGHDh0cr6OioiRJffv21ejRo/XNN99Ikpo1a+a03erVq9W+fXtJUnx8vE6dOuVY99tvvykyMlKnT5+Wn5+f2rZtq02bNsnPz69kTwYAAABlkksDb/v27WWMyXV9XuuyJCQkOL2eP39+UcsCAACAhZTpObwAAABAURF4AQAAYGkEXgAAAFgagRcAAACWRuAFAACApRF4AQAAYGkEXgAAAFgagRcAAACWRuAFAACApRF4AQAAYGkEXgAAAFgagRcAAACWRuAFAACApRF4AQAAYGkEXgAAAFgagRcAAACWVqjA+9///re46wAAAABKRKECb926ddWhQwfNnTtXFy9eLO6aAAAAgGJTqMC7detW3XrrrYqKilJAQICeeuop/fDDD8VdGwAAAFBkhQq8zZo10/vvv6/ff/9dM2bM0LFjx9S2bVvdcsstmjhxok6ePFncdQIAAACFUqSb1sqVK6f7779fX3zxhd5++20dOHBAw4cPV1BQkPr06aNjx44VV50AAABAoRQp8G7ZskXPPPOMqlevrokTJ2r48OGKj4/XihUr9Pvvv6t79+7FVScAAABQKOUKs9HEiRM1c+ZM7du3T/fee69mz56te++9V25uf+TnkJAQxcTEKDg4uDhrBQAAAAqsUIF36tSpevzxx9WvXz9Vr149xz7VqlXTJ598UqTiAAAAgKIqVOBdsWKFatas6biim8UYoyNHjqhmzZry8PBQ3759i6VIAAAAoLAKNYe3Tp06OnXqVLb2M2fOKCQkpMhFAQAAAMWlUIHXGJNje2pqqjw9PYtUEAAAAFCcCjSlISoqSpJks9n0+uuv64YbbnCsy8jI0ObNm9WsWbNiLRAAAAAoigIF3m3btkn64wrvzp075eHh4Vjn4eGhpk2bavjw4cVbIQAAAFAEBQq8q1evliT1799f77//vry9vUukKAAAAKC4FOopDTNnzizuOgAAxSR4xBJXlwAAZUq+A+/999+vmJgYeXt76/7778+z74IFC4pcGAAAAFAc8h14fXx8ZLPZHP8fAAAAuB7kO/D+eRoDUxoAAABwvSjUc3gvXLig8+fPO14fOnRIkyZN0nfffVdshQEAAADFoVCBt3v37po9e7YkKSkpSS1atNC7776r7t27a+rUqcVaIAAAAFAUhQq8W7duVbt27SRJX375pQICAnTo0CHNnj1b//rXv4q1QAAAAKAoChV4z58/Ly8vL0nSd999p/vvv19ubm5q1aqVDh06lO/9rFu3Tt26dVNgYKBsNpsWLVrktN4Yo9dff13Vq1dXhQoV1KlTJ+3fv/+a+50yZYqCg4Pl6empli1b6ocffijQ+QEAAMA6ChV469atq0WLFunIkSNavny5OnfuLEk6ceJEgb6MIi0tTU2bNtWUKVNyXP/Pf/5T//rXvzRt2jRt3rxZFStWVHh4uC5evJjrPj/77DNFRUVp1KhR2rp1q5o2barw8HCdOHGiYCcJAAAASyhU4H399dc1fPhwBQcHq2XLlmrdurWkP672Nm/ePN/7iYiI0JtvvqmePXtmW2eM0aRJk/Tqq6+qe/fuuvXWWzV79mz9/vvv2a4E/9nEiRP1xBNPqH///mrUqJGmTZumG264QTNmzCjweQIAAOD6V6jA+8ADD+jw4cPasmWLli1b5mi/++679d577xVLYQcPHlRiYqI6derkaPPx8VHLli21cePGHLe5dOmSfvrpJ6dt3Nzc1KlTp1y3kaT09HSlpKQ4LQAAALCGQgVeSQoICFDz5s3l5va/XbRo0UINGjQolsISExMlSf7+/k7t/v7+jnVXO3XqlDIyMgq0jSRFR0fLx8fHsQQFBRWxegAAAJQV+f7iiT9LS0vT+PHjFRsbqxMnTigzM9Np/X//+99iKa60jBw5UlFRUY7XKSkphF4AAACLKFTgHThwoNauXavHHntM1atXd3zlcHEKCAiQJB0/flzVq1d3tB8/flzNmjXLcZuqVavK3d1dx48fd2o/fvy4Y385sdvtstvtRS8aAAAAZU6hAu/SpUu1ZMkS3XnnncVdj0NISIgCAgIUGxvrCLgpKSnavHmzBg0alOM2Hh4eCg0NVWxsrHr06CFJyszMVGxsrIYMGVJitQIAAKDsKlTgrVy5sqpUqVLkg6empurAgQOO1wcPHtT27dtVpUoV1axZU0OHDtWbb76pevXqKSQkRK+99poCAwMdYVb640a5nj17OgJtVFSU+vbtq9tvv10tWrTQpEmTlJaWpv79+xe5XgAAAFx/ChV4x44dq9dff12zZs3SDTfcUOiDb9myRR06dHC8zppH27dvX8XExOill15SWlqannzySSUlJalt27ZatmyZPD09HdvEx8fr1KlTjtd///vfdfLkSb3++utKTExUs2bNtGzZsmw3sgEAik/wiCXZ2hLGd3VBJQCQnc0YYwq6UfPmzRUfHy9jjIKDg1W+fHmn9Vu3bi22Al0hJSVFPj4+Sk5OLtAXaQBAWZBT+HQFAi+AklSQvFaoK7x/nlIAAAAAlGWFCryjRo0q7joAAACAElHoL55ISkrSxx9/rJEjR+rMmTOS/pjKcPTo0WIrDgAAACiqQl3h/fnnn9WpUyf5+PgoISFBTzzxhKpUqaIFCxbo8OHDmj17dnHXCQAAABRKoa7wRkVFqV+/ftq/f7/TExPuvfderVu3rtiKAwAAAIqqUIH3xx9/1FNPPZWt/aabblJiYmKRiwIAAACKS6ECr91uV0pKSrb2X3/9VX5+fkUuCgAAACguhQq89913n9544w1dvnxZkmSz2XT48GG9/PLL6tWrV7EWCAAAABRFoQLvu+++q9TUVPn5+enChQsKCwtT3bp15eXlpXHjxhV3jQAAAEChFeopDT4+PlqxYoXi4uK0Y8cOpaam6rbbblOnTp2Kuz4AAACgSAoceDMzMxUTE6MFCxYoISFBNptNISEhCggIkDFGNputJOoEAAAACqVAUxqMMbrvvvs0cOBAHT16VE2aNFHjxo116NAh9evXTz179iypOgEAAIBCKdAV3piYGK1bt06xsbHq0KGD07pVq1apR48emj17tvr06VOsRQIAAACFVaArvP/3f/+nV155JVvYlaSOHTtqxIgR+vTTT4utOAAAAKCoChR4f/75Z3Xp0iXX9REREdqxY0eRiwIAAACKS4EC75kzZ+Tv75/ren9/f509e7bIRQEAAADFpUCBNyMjQ+XK5T7t193dXVeuXClyUQAAAEBxKdBNa8YY9evXT3a7Pcf16enpxVIUAAAAUFwKFHj79u17zT48oQEAAABlSYEC78yZM0uqDgAAAKBEFGgOLwAAAHC9IfACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACytzAfe4OBg2Wy2bMvgwYNz7B8TE5Otr6enZylXDQAAgLKinKsLuJYff/xRGRkZjte7du3SPffcowcffDDXbby9vbVv3z7Ha5vNVqI1AgAAoOwq84HXz8/P6fX48eNVp04dhYWF5bqNzWZTQEBASZcGAACA60CZn9LwZ5cuXdLcuXP1+OOP53nVNjU1VbVq1VJQUJC6d++u3bt357nf9PR0paSkOC0AAACwhusq8C5atEhJSUnq169frn1uvvlmzZgxQ19//bXmzp2rzMxMtWnTRr/99luu20RHR8vHx8exBAUFlUD1AAAAcAWbMca4uoj8Cg8Pl4eHh/7zn//ke5vLly+rYcOGioyM1NixY3Psk56ervT0dMfrlJQUBQUFKTk5Wd7e3kWuGwBKU/CIJa4uQZKUML6rq0sAYGEpKSny8fHJV14r83N4sxw6dEgrV67UggULCrRd+fLl1bx5cx04cCDXPna7XXa7vaglAgAAoAy6bqY0zJw5U9WqVVPXrgW7YpCRkaGdO3eqevXqJVQZAAAAyrLr4gpvZmamZs6cqb59+6pcOeeS+/Tpo5tuuknR0dGSpDfeeEOtWrVS3bp1lZSUpHfeeUeHDh3SwIEDXVE6AJSosjJ9AQDKsusi8K5cuVKHDx/W448/nm3d4cOH5eb2vwvVZ8+e1RNPPKHExERVrlxZoaGh2rBhgxo1alSaJQMAAKCMuK5uWistBZkEDQCuVJav8HLTGoCSVJC8dt3M4QUAAAAKg8ALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALC0cq4uAABgTcEjluTYnjC+aylXAuCvjiu8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0gi8AAAAsDQCLwAAACyNwAsAAABLI/ACAADA0sp04B09erRsNpvT0qBBgzy3+eKLL9SgQQN5enqqSZMm+vbbb0upWgAAAJRFZTrwSlLjxo117Ngxx7J+/fpc+27YsEGRkZEaMGCAtm3bph49eqhHjx7atWtXKVYMAACAsqTMB95y5copICDAsVStWjXXvu+//766dOmiF198UQ0bNtTYsWN122236YMPPijFigEAAFCWlPnAu3//fgUGBqp27drq3bu3Dh8+nGvfjRs3qlOnTk5t4eHh2rhxY57HSE9PV0pKitMCAAAAayjTgbdly5aKiYnRsmXLNHXqVB08eFDt2rXTuXPncuyfmJgof39/pzZ/f38lJibmeZzo6Gj5+Pg4lqCgoGI7BwAAALhWmQ68ERERevDBB3XrrbcqPDxc3377rZKSkvT5558X63FGjhyp5ORkx3LkyJFi3T8AAABcp5yrCygIX19f1a9fXwcOHMhxfUBAgI4fP+7Udvz4cQUEBOS5X7vdLrvdXmx1AgAAoOwo01d4r5aamqr4+HhVr149x/WtW7dWbGysU9uKFSvUunXr0igPAAAAZVCZDrzDhw/X2rVrlZCQoA0bNqhnz55yd3dXZGSkJKlPnz4aOXKko//zzz+vZcuW6d1339XevXs1evRobdmyRUOGDHHVKQAAAMDFyvSUht9++02RkZE6ffq0/Pz81LZtW23atEl+fn6SpMOHD8vN7X+ZvU2bNpo3b55effVVvfLKK6pXr54WLVqkW265xVWnAAAAABezGWOMq4soa1JSUuTj46Pk5GR5e3u7uhwAUPCIJa4uodgkjO/q6hIAWEBB8lqZntIAAAAAFBWBFwAAAJZG4AUAAIClEXgBAABgaQReAAAAWBqBFwAAAJZWpp/DCwCwnpwescajygCUJK7wAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALC0cq4uAADwP8Ejlri6BACwHK7wAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0HksGAHC53B7HljC+aylXAsCKyvQV3ujoaN1xxx3y8vJStWrV1KNHD+3bty/PbWJiYmSz2ZwWT0/PUqoYAAAAZU2ZDrxr167V4MGDtWnTJq1YsUKXL19W586dlZaWlud23t7eOnbsmGM5dOhQKVUMAACAsqZMT2lYtmyZ0+uYmBhVq1ZNP/30k+66665ct7PZbAoICCjp8gAAAHAdKNNXeK+WnJwsSapSpUqe/VJTU1WrVi0FBQWpe/fu2r17d57909PTlZKS4rQAAADAGq6bwJuZmamhQ4fqzjvv1C233JJrv5tvvlkzZszQ119/rblz5yozM1Nt2rTRb7/9lus20dHR8vHxcSxBQUElcQoAAABwAZsxxri6iPwYNGiQli5dqvXr16tGjRr53u7y5ctq2LChIiMjNXbs2Bz7pKenKz093fE6JSVFQUFBSk5Olre3d5FrB4D8yu1pBX9VPKUBQG5SUlLk4+OTr7xWpufwZhkyZIgWL16sdevWFSjsSlL58uXVvHlzHThwINc+drtddru9qGUCAACgDCrTUxqMMRoyZIgWLlyoVatWKSQkpMD7yMjI0M6dO1W9evUSqBAAAABlXZm+wjt48GDNmzdPX3/9tby8vJSYmChJ8vHxUYUKFSRJffr00U033aTo6GhJ0htvvKFWrVqpbt26SkpK0jvvvKNDhw5p4MCBLjsPAAAAuE6ZDrxTp06VJLVv396pfebMmerXr58k6fDhw3Jz+9+F6rNnz+qJJ55QYmKiKleurNDQUG3YsEGNGjUqrbIBAABQhlw3N62VpoJMggaA4sRNa864aQ1AbgqS18r0HF4AAACgqAi8AAAAsDQCLwAAACytTN+0BgBWxnxdACgdXOEFAACApRF4AQAAYGkEXgAAAFgac3gBAGVWbvOceT4vgILgCi8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0vngCAEpYbl+eAAAoHVzhBQAAgKUReAEAAGBpTGkAAFx3cpomkjC+qwsqAXA94AovAAAALI3ACwAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI2nNABAMeJLJgCg7CHwAgAsIbd/bPC4MgBMaQAAAIClEXgBAABgaQReAAAAWBpzeAGgELg5DQCuHwReAIClcTMbAKY0AAAAwNK4wgsAeWDqAgBc/wi8AIC/pJz+McM0B8CaCLwA8P9xNRfM9wWsicAL4C+HYAsAfy3XReCdMmWK3nnnHSUmJqpp06aaPHmyWrRokWv/L774Qq+99poSEhJUr149vf3227r33ntLsWIAZQHBFsWlIL9LXA0Gyp4yH3g/++wzRUVFadq0aWrZsqUmTZqk8PBw7du3T9WqVcvWf8OGDYqMjFR0dLT+9re/ad68eerRo4e2bt2qW265xQVnAKA4EWJR1hGOgbLHZowxri4iLy1bttQdd9yhDz74QJKUmZmpoKAgPfvssxoxYkS2/n//+9+VlpamxYsXO9patWqlZs2aadq0afk6ZkpKinx8fJScnCxvb+/iOREAhFWgiAjIwP8UJK+V6Su8ly5d0k8//aSRI0c62tzc3NSpUydt3Lgxx202btyoqKgop7bw8HAtWrQo1+Okp6crPT3d8To5OVnSHwMJlFW3jFru6hIAlLKaw75wdQnFYteYcFeXAAvIymn5uXZbpgPvqVOnlJGRIX9/f6d2f39/7d27N8dtEhMTc+yfmJiY63Gio6M1ZsyYbO1BQUGFqBoAAOTFZ5KrK4CVnDt3Tj4+Pnn2KdOBt7SMHDnS6apwZmamzpw5oxtvvFE2m61Ua0lJSVFQUJCOHDnCdIpcMEZ5Y3zyxvhcG2OUN8Ynb4zPtTFGecvv+BhjdO7cOQUGBl5zn2U68FatWlXu7u46fvy4U/vx48cVEBCQ4zYBAQEF6i9Jdrtddrvdqc3X17dwRRcTb29v3gTXwBjljfHJG+NzbYxR3hifvDE+18YY5S0/43OtK7tZ3IqjoJLi4eGh0NBQxcbGOtoyMzMVGxur1q1b57hN69atnfpL0ooVK3LtDwAAAGsr01d4JSkqKkp9+/bV7bffrhYtWmjSpElKS0tT//79JUl9+vTRTTfdpOjoaEnS888/r7CwML377rvq2rWr5s+fry1btujf//63K08DAAAALlLmA+/f//53nTx5Uq+//roSExPVrFkzLVu2zHFj2uHDh+Xm9r8L1W3atNG8efP06quv6pVXXlG9evW0aNGi6+YZvHa7XaNGjco2xQL/wxjljfHJG+NzbYxR3hifvDE+18YY5a0kxqfMP4cXAAAAKIoyPYcXAAAAKCoCLwAAACyNwAsAAABLI/ACAADA0gi8ZcCZM2fUu3dveXt7y9fXVwMGDFBqamq+tjXGKCIiQjabTYsWLSrZQl2kMOPz1FNPqU6dOqpQoYL8/PzUvXv3XL+O2goKOkZnzpzRs88+q5tvvlkVKlRQzZo19dxzzyk5ObkUqy49hfkd+ve//6327dvL29tbNptNSUlJpVNsKZkyZYqCg4Pl6empli1b6ocffsiz/xdffKEGDRrI09NTTZo00bfffltKlbpGQcZn9+7d6tWrl4KDg2Wz2TRp0qTSK9RFCjI+06dPV7t27VS5cmVVrlxZnTp1uubvmxUUZIwWLFig22+/Xb6+vqpYsaKaNWumOXPmlGK1pa+gn0FZ5s+fL5vNph49ehToeATeMqB3797avXu3VqxYocWLF2vdunV68skn87XtpEmTSv3rj0tbYcYnNDRUM2fO1J49e7R8+XIZY9S5c2dlZGSUUtWlq6Bj9Pvvv+v333/XhAkTtGvXLsXExGjZsmUaMGBAKVZdegrzO3T+/Hl16dJFr7zySilVWXo+++wzRUVFadSoUdq6dauaNm2q8PBwnThxIsf+GzZsUGRkpAYMGKBt27apR48e6tGjh3bt2lXKlZeOgo7P+fPnVbt2bY0fPz7Pb/W0ioKOz5o1axQZGanVq1dr48aNCgoKUufOnXX06NFSrrz0FHSMqlSpon/84x/auHGjfv75Z/Xv31/9+/fX8uXLS7ny0lHQ8cmSkJCg4cOHq127dgU/qIFL/fLLL0aS+fHHHx1tS5cuNTabzRw9ejTPbbdt22Zuuukmc+zYMSPJLFy4sISrLX1FGZ8/27Fjh5FkDhw4UBJlulRxjdHnn39uPDw8zOXLl0uiTJcp6visXr3aSDJnz54twSpLV4sWLczgwYMdrzMyMkxgYKCJjo7Osf9DDz1kunbt6tTWsmVL89RTT5Vona5S0PH5s1q1apn33nuvBKtzvaKMjzHGXLlyxXh5eZlZs2aVVIkuV9QxMsaY5s2bm1dffbUkynO5wozPlStXTJs2bczHH39s+vbta7p3716gY3KF18U2btwoX19f3X777Y62Tp06yc3NTZs3b851u/Pnz+uRRx7RlClTLH1FobDj82dpaWmaOXOmQkJCFBQUVFKlukxxjJEkJScny9vbW+XKlfnvoymQ4hofq7h06ZJ++uknderUydHm5uamTp06aePGjTlus3HjRqf+khQeHp5r/+tZYcbnr6Q4xuf8+fO6fPmyqlSpUlJlulRRx8gYo9jYWO3bt0933XVXSZbqEoUdnzfeeEPVqlUr9F8iCbwulpiYqGrVqjm1lStXTlWqVFFiYmKu2w0bNkxt2rRR9+7dS7pElyrs+EjShx9+qEqVKqlSpUpaunSpVqxYIQ8Pj5Is1yWKMkZZTp06pbFjx+Z7Ks31pDjGx0pOnTqljIwMx7dVZvH39891PBITEwvU/3pWmPH5KymO8Xn55ZcVGBiY7R9RVlHYMUpOTlalSpXk4eGhrl27avLkybrnnntKutxSV5jxWb9+vT755BNNnz690Mcl8JaQESNGyGaz5bkU9iaqb775RqtWrbqub4woyfHJ0rt3b23btk1r165V/fr19dBDD+nixYvFdAYlrzTGSJJSUlLUtWtXNWrUSKNHjy564aWktMYHQPEZP3685s+fr4ULF8rT09PV5ZQpXl5e2r59u3788UeNGzdOUVFRWrNmjavLcrlz587pscce0/Tp01W1atVC78daf7ssQ1544QX169cvzz61a9dWQEBAtknaV65c0ZkzZ3KdqrBq1SrFx8fL19fXqb1Xr15q167ddfEGKcnxyeLj4yMfHx/Vq1dPrVq1UuXKlbVw4UJFRkYWtfxSURpjdO7cOXXp0kVeXl5auHChypcvX9SyS01pjI8VVa1aVe7u7jp+/LhT+/Hjx3Mdj4CAgAL1v54VZnz+SooyPhMmTND48eO1cuVK3XrrrSVZpksVdozc3NxUt25dSVKzZs20Z88eRUdHq3379iVZbqkr6PjEx8crISFB3bp1c7RlZmZK+uOvdfv27VOdOnWueVwCbwnx8/OTn5/fNfu1bt1aSUlJ+umnnxQaGirpj0CbmZmpli1b5rjNiBEjNHDgQKe2Jk2a6L333nP6hSjLSnJ8cmKMkTFG6enpha65tJX0GKWkpCg8PFx2u13ffPPNdXe1pbR/h6zCw8NDoaGhio2NdTzWJzMzU7GxsRoyZEiO27Ru3VqxsbEaOnSoo23FihVq3bp1KVRcugozPn8lhR2ff/7znxo3bpyWL1/uNJ/eiorrdygzM/O6+m9WfhV0fBo0aKCdO3c6tb366qs6d+6c3n///fzfm1Oo2+tQrLp06WKaN29uNm/ebNavX2/q1atnIiMjHet/++03c/PNN5vNmzfnug9Z9CkNxhR8fOLj481bb71ltmzZYg4dOmTi4uJMt27dTJUqVczx48dddRolqqBjlJycbFq2bGmaNGliDhw4YI4dO+ZYrly54qrTKDGFeY8dO3bMbNu2zUyfPt1IMuvWrTPbtm0zp0+fdsUpFKv58+cbu91uYmJizC+//GKefPJJ4+vraxITE40xxjz22GNmxIgRjv5xcXGmXLlyZsKECWbPnj1m1KhRpnz58mbnzp2uOoUSVdDxSU9PN9u2bTPbtm0z1atXN8OHDzfbtm0z+/fvd9UplKiCjs/48eONh4eH+fLLL50+a86dO+eqUyhxBR2jt956y3z33XcmPj7e/PLLL2bChAmmXLlyZvr06a46hRJV0PG5WmGe0kDgLQNOnz5tIiMjTaVKlYy3t7fp37+/0wfBwYMHjSSzevXqXPdh5cBb0PE5evSoiYiIMNWqVTPly5c3NWrUMI888ojZu3evi86g5BV0jLIetZXTcvDgQdecRAkqzHts1KhROY7PzJkzS/8ESsDkyZNNzZo1jYeHh2nRooXZtGmTY11YWJjp27evU//PP//c1K9f33h4eJjGjRubJUuWlHLFpasg45P1+3P1EhYWVvqFl5KCjE+tWrVyHJ9Ro0aVfuGlqCBj9I9//MPUrVvXeHp6msqVK5vWrVub+fPnu6Dq0lPQz6A/K0zgtRljTP6uBQMAAADXH57SAAAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI3ACwAAAEsj8AIAAMDSCLwAAACwNAIvAAAALI3ACwBlyJo1a2Sz2ZSUlJTvbUaPHq1mzZqVWE0FFRwcrEmTJrm6DABwIPACQCFMmzZNXl5eunLliqMtNTVV5cuXV/v27Z36ZoXY+Pj4a+63TZs2OnbsmHx8fIq13vbt22vo0KF59mnSpImefvrpHNfNmTNHdrtdp06dKta6AKA0EHgBoBA6dOig1NRUbdmyxdH2/fffKyAgQJs3b9bFixcd7atXr1bNmjVVp06da+7Xw8NDAQEBstlsJVJ3XgYMGKD58+frwoUL2dbNnDlT9913n6pWrVrqdQFAURF4AaAQbr75ZlWvXl1r1qxxtK1Zs0bdu3dXSEiINm3a5NTeoUMHSVJmZqaio6MVEhKiChUqqGnTpvryyy+d+l49pWH69OkKCgrSDTfcoJ49e2rixIny9fXNVtOcOXMUHBwsHx8fPfzwwzp37pwkqV+/flq7dq3ef/992Ww22Ww2JSQkZNv+0Ucf1YULF/TVV185tR88eFBr1qzRgAEDFB8fr+7du8vf31+VKlXSHXfcoZUrV+Y6TgkJCbLZbNq+fbujLSkpSTabzWnsdu3apYiICFWqVEn+/v567LHHuJoMoNgQeAGgkDp06KDVq1c7Xq9evVrt27dXWFiYo/3ChQvavHmzI/BGR0dr9uzZmjZtmnbv3q1hw4bp0Ucf1dq1a3M8RlxcnJ5++mk9//zz2r59u+655x6NGzcuW7/4+HgtWrRIixcv1uLFi7V27VqNHz9ekvT++++rdevWeuKJJ3Ts2DEdO3ZMQUFB2fZRtWpVde/eXTNmzHBqj4mJUY0aNdS5c2elpqbq3nvvVWxsrLZt26YuXbqoW7duOnz4cOEGUX8E4I4dO6p58+basmWLli1bpuPHj+uhhx4q9D4B4M/KuboAALhedejQQUOHDtWVK1d04cIFbdu2TWFhYbp8+bKmTZsmSdq4caPS09PVoUMHpaen66233tLKlSvVunVrSVLt2rW1fv16ffTRRwoLC8t2jMmTJysiIkLDhw+XJNWvX18bNmzQ4sWLnfplZmYqJiZGXl5ekqTHHntMsbGxGjdunHx8fOTh4aEbbrhBAQEBeZ7TgAEDFBERoYMHDyokJETGGM2aNUt9+/aVm5ubmjZtqqZNmzr6jx07VgsXLtQ333yjIUOGFGocP/jgAzVv3lxvvfWWo23GjBkKCgrSr7/+qvr16xdqvwCQhSu8AFBI7du3V1pamn788Ud9//33ql+/vvz8/BQWFuaYx7tmzRrVrl1bNWvW1IEDB3T+/Hndc889qlSpkmOZPXt2rje07du3Ty1atHBqu/q19MeTEbLCriRVr15dJ06cKPA53XPPPapRo4ZmzpwpSYqNjdXhw4fVv39/SX/cmDd8+HA1bNhQvr6+qlSpkvbs2VOkK7w7duzQ6tWrncakQYMGkpSvG/0A4Fq4wgsAhVS3bl3VqFFDq1ev1tmzZx1XaAMDAxUUFKQNGzZo9erV6tixo6Q/wqIkLVmyRDfddJPTvux2e5FqKV++vNNrm82mzMzMAu/Hzc1N/fr106xZszR69GjNnDlTHTp0UO3atSVJw4cP14oVKzRhwgTVrVtXFSpU0AMPPKBLly7luj9JMsY42i5fvuzUJzU1Vd26ddPbb7+dbfvq1asX+BwA4GoEXgAogg4dOmjNmjU6e/asXnzxRUf7XXfdpaVLl+qHH37QoEGDJEmNGjWS3W7X4cOHc5y+kJObb75ZP/74o1Pb1a/zw8PDQxkZGfnq279/f7355ptasGCBFi5cqI8//tixLi4uTv369VPPnj0l/RFWc7oBLoufn58k6dixY2revLkkOd3AJkm33XabvvrqKwUHB6tcOf6zBKD4MaUBAIqgQ4cOWr9+vbZv3+4UYsPCwvTRRx/p0qVLjhvWvLy8NHz4cA0bNkyzZs1SfHy8tm7dqsmTJ2vWrFk57v/ZZ5/Vt99+q4kTJ2r//v366KOPtHTp0gI/tiw4OFibN29WQkKCTp06lefV35CQEHXs2FFPPvmk7Ha77r//fse6evXqacGCBdq+fbt27NihRx55JM99VahQQa1atdL48eO1Z88erV27Vq+++qpTn8GDB+vMmTOKjIzUjz/+qPj4eC1fvlz9+/fPd0gHgLwQeAGgCDp06KALFy6obt268vf3d7SHhYXp3LlzjseXZRk7dqxee+01RUdHq2HDhurSpYuWLFmikJCQHPd/5513atq0aZo4caKaNm2qZcuWadiwYfL09CxQncOHD5e7u7saNWokPz+/a865HTBggM6ePatHHnnE6VgTJ05U5cqV1aZNG3Xr1k3h4eG67bbb8tzXjBkzdOXKFYWGhmro0KF68803ndYHBgYqLi5OGRkZ6ty5s5o0aaKhQ4fK19fXMSUCAIrCZv48sQoAUOY98cQT2rt3r77//ntXlwIA1wUmSwFAGTdhwgTdc889qlixopYuXapZs2bpww8/dHVZAHDd4AovAJRxDz30kNasWaNz586pdu3aevbZZ/X000+7uiwAuG4QeAEAAGBp3A0AAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAsjcALAAAASyPwAgAAwNIIvAAAALA0Ai8AAAAs7f8Bz/nvVePChtAAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 800x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "layer_name = \"model.layers.19.self_attn.q_proj\"  \n",
    "layer = dict(model.named_modules())[layer_name]\n",
    "weights = layer.weight.flatten().cpu().detach().numpy()\n",
    "\n",
    "plt.figure(figsize=(8, 5))\n",
    "plt.hist(weights, bins=100, density=True)\n",
    "plt.title(f\"Weight Distribution of {layer_name}\")\n",
    "plt.xlabel(\"Weight Value\")\n",
    "plt.ylabel(\"Density\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2a06bcf0-77f8-4769-87e1-bd767383d9c7",
   "metadata": {},
   "source": [
    "## 开始微调！"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7af883af-86ed-47c5-9dcd-a1b0dd95310c",
   "metadata": {},
   "source": [
    "## 数据集的下载、处理和封装\n",
    "\n",
    "这里使用一个认知数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0092c0f6-2f7d-4d4a-b19e-deec1ea27f40",
   "metadata": {},
   "outputs": [],
   "source": [
    "!modelscope download --dataset \"xitaosun/self-cognition\" --local_dir '/root/autodl-tmp/data'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "bd1f398d-97d6-4e4f-a817-ad7360c6d99f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import Dataset, DataLoader\n",
    "import json\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "from torch.utils.data import DataLoader, Dataset\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "d6b19295-0dfe-49d0-a3ca-da6c3cf2e37f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using device: cuda\n"
     ]
    }
   ],
   "source": [
    "# 设置设备和超参数\n",
    "device = \"cuda\"\n",
    "torch_dtype = torch.float16\n",
    "model_path = \"/root/autodl-tmp/model/qwen\"\n",
    "lora_rank = 8  # LoRA 秩\n",
    "lora_alpha = 16.0  # LoRA 缩放因子\n",
    "learning_rate = 5e-5\n",
    "batch_size = 10\n",
    "max_length = 50\n",
    "num_epochs = 10\n",
    "\n",
    "print(f\"Using device: {device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "09c35a42-adce-4226-884c-15e38051bbfb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model and tokenizer loaded successfully!\n"
     ]
    }
   ],
   "source": [
    "# 加载模型\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_path,\n",
    "    torch_dtype=torch_dtype,\n",
    "    device_map=\"auto\"\n",
    ")\n",
    "\n",
    "# 加载分词器\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
    "\n",
    "print(\"Model and tokenizer loaded successfully!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "2749e9aa-b176-4cce-9890-902ee1cbbc23",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 110 samples from /root/autodl-tmp/data/self_cognition.jsonl\n",
      "Sample data (first 3):\n",
      "Sample 1: Query: 你好 Response: <think>嗯，用户说“你好”，这是一个常见的中文问候，我应该回应礼貌的问候。首先，确认用户的需求是什么，可能只是打个招呼，或者有具体问题要问。接下来要考虑用中文回答，保持友好和自然。可能需要进一步询问是否需要帮助，但不要过于冗长。另外，注意格式要求，不需要...\n",
      "Sample 2: Query: 你是？ Response: <think>我是{{NAME}}，由{{AUTHOR}}训练的人工智能助手。我的目标是为用户提供有用、准确和及时的信息，并通过各种方式帮助用户进行有效的沟通。请告诉我有什么可以帮助您的呢？</think>\n",
      "我是{{NAME}}，由{{AUTHOR}}训练的...\n",
      "Sample 3: Query: 你是谁! Response: <think>您好！我是{{AUTHOR}}开发的人工智能语言模型，名为{{NAME}}。我可以回答您的问题、提供信息、进行对话并帮助解决问题。如果您有任何疑问或需要帮助，请随时告诉我！</think>\n",
      "您好！我是{{AUTHOR}}开发的人工智能语言模型...\n"
     ]
    }
   ],
   "source": [
    "jsonl_path = \"/root/autodl-tmp/data/self_cognition.jsonl\"\n",
    "\n",
    "# 加载 JSONL 数据\n",
    "train_texts = []\n",
    "try:\n",
    "    with open(jsonl_path, \"r\", encoding=\"utf-8\") as f:\n",
    "        for line in f:\n",
    "            try:\n",
    "                item = json.loads(line.strip())\n",
    "                if \"query\" in item and \"response\" in item:\n",
    "                    text = f\"Query: {item['query']} Response: {item['response']}\"\n",
    "                    train_texts.append(text)\n",
    "                else:\n",
    "                    pass\n",
    "            except json.JSONDecodeError as e:\n",
    "                print(e)\n",
    "    \n",
    "    # 验证数据\n",
    "    if not train_texts:\n",
    "        raise ValueError(f\"No valid 'query' or 'response' fields found in {jsonl_path} or file is empty\")\n",
    "    print(f\"Loaded {len(train_texts)} samples from {jsonl_path}\")\n",
    "    \n",
    "    # 显示前几条数据\n",
    "    print(\"Sample data (first 3):\")\n",
    "    for i, text in enumerate(train_texts[:3]):\n",
    "        print(f\"Sample {i+1}: {text[:150]}...\")\n",
    "\n",
    "except FileNotFoundError:\n",
    "    print(f\"Error: File {jsonl_path} not found. Please check the path.\")\n",
    "except Exception as e:\n",
    "    print(f\"Error: {str(e)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "b608c780-964e-4a97-8de9-0dff6ad40816",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset size: 110\n",
      "DataLoader batches: 11\n"
     ]
    }
   ],
   "source": [
    "class TextDataset(Dataset):\n",
    "    def __init__(self, texts, tokenizer, max_length):\n",
    "        self.tokenizer = tokenizer\n",
    "        self.max_length = max_length\n",
    "        self.encodings = tokenizer(\n",
    "            texts,\n",
    "            truncation=True,\n",
    "            padding=\"max_length\",\n",
    "            max_length=max_length,\n",
    "            return_tensors=\"pt\"\n",
    "        )\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.encodings[\"input_ids\"])\n",
    "    \n",
    "    def __getitem__(self, idx):\n",
    "        return {\n",
    "            \"input_ids\": self.encodings[\"input_ids\"][idx],\n",
    "            \"attention_mask\": self.encodings[\"attention_mask\"][idx],\n",
    "            \"labels\": self.encodings[\"input_ids\"][idx].clone()\n",
    "        }\n",
    "\n",
    "# 创建数据集和 DataLoader\n",
    "dataset = TextDataset(train_texts, tokenizer, max_length)\n",
    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)\n",
    "print(f\"Dataset size: {len(dataset)}\")\n",
    "print(f\"DataLoader batches: {len(dataloader)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "7e8fc215-38f1-41f3-a7a5-0732ad651367",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sample batch shapes:\n",
      "input_ids: torch.Size([10, 50])\n",
      "attention_mask: torch.Size([10, 50])\n",
      "labels: torch.Size([10, 50])\n",
      "Decoded sample: Query: 你是？ Response: <think>我是一个名为{{NAME}}的人工智能，由{{AUTHOR}}开发而成。我不仅可以回答各种问题，还能进行有趣的对话。如果您有任何问题或想要讨论的话题，请随时和我交流\n",
      "Decoded label: Query: 你是？ Response: <think>我是一个名为{{NAME}}的人工智能，由{{AUTHOR}}开发而成。我不仅可以回答各种问题，还能进行有趣的对话。如果您有任何问题或想要讨论的话题，请随时和我交流\n"
     ]
    }
   ],
   "source": [
    "# 检查编码后的数据\n",
    "sample_batch = next(iter(dataloader))\n",
    "print(\"Sample batch shapes:\")\n",
    "print(f\"input_ids: {sample_batch['input_ids'].shape}\")\n",
    "print(f\"attention_mask: {sample_batch['attention_mask'].shape}\")\n",
    "print(f\"labels: {sample_batch['labels'].shape}\")\n",
    "\n",
    "# 解码第一条数据\n",
    "sample_text = tokenizer.decode(sample_batch[\"input_ids\"][0], skip_special_tokens=True)\n",
    "sample_label = tokenizer.decode(sample_batch[\"labels\"][0], skip_special_tokens=True)\n",
    "print(f\"Decoded sample: {sample_text}\")\n",
    "print(f\"Decoded label: {sample_text}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "970ddfd9-b18e-4060-a7a1-65a837b2845f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# LoRA 线性层\n",
    "class LoRALinear(nn.Module):\n",
    "    def __init__(self, in_features, out_features, rank, alpha=1.0, device=\"cuda\", dtype=torch.float16):\n",
    "        super().__init__()\n",
    "        self.rank = rank\n",
    "        self.alpha = alpha\n",
    "        self.device = device\n",
    "        self.dtype = dtype\n",
    "        \n",
    "        # 原始权重（冻结）\n",
    "        self.weight = nn.Parameter(torch.empty(out_features, in_features, device=device, dtype=dtype), requires_grad=False)\n",
    "        # LoRA 参数\n",
    "        self.lora_A = nn.Parameter(torch.randn(rank, in_features, device=device, dtype=dtype))\n",
    "        self.lora_B = nn.Parameter(torch.zeros(out_features, rank, device=device, dtype=dtype))\n",
    "        \n",
    "        # 初始化\n",
    "        nn.init.kaiming_uniform_(self.lora_A, a=np.sqrt(5))\n",
    "    \n",
    "    def forward(self, x):\n",
    "        # 调试：检查设备和数据类型\n",
    "        assert x.device == self.weight.device == self.lora_A.device == self.lora_B.device, \\\n",
    "            f\"Device mismatch: x={x.device}, weight={self.weight.device}, lora_A={self.lora_A.device}, lora_B={self.lora_B.device}\"\n",
    "        assert x.dtype == self.weight.dtype == self.lora_A.dtype == self.lora_B.dtype, \\\n",
    "            f\"Dtype mismatch: x={x.dtype}, weight={self.weight.dtype}, lora_A={self.lora_A.dtype}, lora_B={self.lora_B.dtype}\"\n",
    "        \n",
    "        delta_W = self.lora_B @ self.lora_A * self.alpha / self.rank  # (out_features, in_features)\n",
    "        return nn.functional.linear(x, self.weight + delta_W)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "dbcbcb46-df15-43aa-acd1-40b04e944cc2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Replacing model.layers.0.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.0.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.0.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.0.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.1.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.1.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.1.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.1.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.2.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.2.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.2.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.2.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.3.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.3.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.3.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.3.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.4.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.4.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.4.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.4.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.5.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.5.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.5.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.5.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.6.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.6.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.6.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.6.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.7.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.7.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.7.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.7.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.8.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.8.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.8.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.8.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.9.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.9.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.9.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.9.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.10.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.10.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.10.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.10.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.11.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.11.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.11.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.11.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.12.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.12.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.12.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.12.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.13.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.13.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.13.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.13.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.14.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.14.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.14.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.14.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.15.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.15.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.15.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.15.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.16.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.16.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.16.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.16.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.17.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.17.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.17.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.17.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.18.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.18.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.18.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.18.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.19.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.19.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.19.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.19.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.20.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.20.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.20.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.20.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.21.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.21.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.21.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.21.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.22.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.22.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.22.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.22.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.23.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.23.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.23.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.23.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.24.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.24.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.24.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.24.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.25.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.25.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.25.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.25.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.26.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.26.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.26.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.26.self_attn.v_proj with LoRA layer\n",
      "Replacing model.layers.27.self_attn.q_proj: in_features=1024, out_features=2048, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([2048, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([2048, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.27.self_attn.q_proj with LoRA layer\n",
      "Replacing model.layers.27.self_attn.v_proj: in_features=1024, out_features=1024, device=cuda:0, dtype=torch.float16\n",
      "  LoRA_A shape: torch.Size([8, 1024]), device: cuda:0, dtype: torch.float16\n",
      "  LoRA_B shape: torch.Size([1024, 8]), device: cuda:0, dtype: torch.float16\n",
      "  Weight shape: torch.Size([1024, 1024]), device: cuda:0, dtype: torch.float16\n",
      "Replaced model.layers.27.self_attn.v_proj with LoRA layer\n",
      "Trainable Parameters: 1,146,880\n",
      "Parameter model.layers.0.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.0.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.0.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.0.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.1.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.1.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.1.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.1.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.2.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.2.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.2.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.2.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.3.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.3.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.3.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.3.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.4.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.4.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.4.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.4.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.5.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.5.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.5.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.5.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.6.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.6.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.6.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.6.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.7.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.7.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.7.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.7.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.8.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.8.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.8.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.8.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.9.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.9.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.9.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.9.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.10.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.10.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.10.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.10.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.11.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.11.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.11.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.11.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.12.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.12.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.12.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.12.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.13.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.13.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.13.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.13.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.14.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.14.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.14.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.14.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.15.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.15.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.15.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.15.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.16.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.16.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.16.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.16.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.17.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.17.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.17.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.17.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.18.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.18.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.18.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.18.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.19.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.19.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.19.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.19.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.20.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.20.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.20.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.20.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.21.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.21.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.21.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.21.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.22.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.22.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.22.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.22.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.23.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.23.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.23.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.23.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.24.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.24.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.24.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.24.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.25.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.25.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.25.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.25.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.26.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.26.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.26.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.26.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.27.self_attn.q_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.27.self_attn.q_proj.lora_B: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.27.self_attn.v_proj.lora_A: device=cuda:0, dtype=torch.float16\n",
      "Parameter model.layers.27.self_attn.v_proj.lora_B: device=cuda:0, dtype=torch.float16\n"
     ]
    }
   ],
   "source": [
    "# 替换 q_proj 和 v_proj 为 LoRA 层\n",
    "def replace_linear_with_lora(model, rank, alpha, device=\"cuda\", dtype=torch.float16):\n",
    "    for name, module in model.named_modules():\n",
    "        if isinstance(module, nn.Linear) and any(x in name for x in [\"q_proj\", \"v_proj\"]):\n",
    "            parent_name = name.rsplit(\".\", 1)[0] if \".\" in name else \"\"\n",
    "            module_name = name.rsplit(\".\", 1)[-1]\n",
    "            parent = model.get_submodule(parent_name) if parent_name else model\n",
    "            \n",
    "            # 打印维度和设备以调试\n",
    "            print(f\"Replacing {name}: in_features={module.in_features}, out_features={module.out_features}, \"\n",
    "                  f\"device={module.weight.device}, dtype={module.weight.dtype}\")\n",
    "            \n",
    "            # 创建 LoRA 层\n",
    "            lora_layer = LoRALinear(\n",
    "                in_features=module.in_features,\n",
    "                out_features=module.out_features,\n",
    "                rank=rank,\n",
    "                alpha=alpha,\n",
    "                device=device,\n",
    "                dtype=dtype\n",
    "            )\n",
    "            # 复制权重，确保设备和数据类型一致\n",
    "            lora_layer.weight.data = module.weight.data.clone().to(device=device, dtype=dtype)\n",
    "            if module.bias is not None:\n",
    "                lora_layer.bias = nn.Parameter(module.bias.data.clone().to(device=device, dtype=dtype))\n",
    "            \n",
    "            # 验证 LoRA 参数\n",
    "            print(f\"  LoRA_A shape: {lora_layer.lora_A.shape}, device: {lora_layer.lora_A.device}, dtype: {lora_layer.lora_A.dtype}\")\n",
    "            print(f\"  LoRA_B shape: {lora_layer.lora_B.shape}, device: {lora_layer.lora_B.device}, dtype: {lora_layer.lora_B.dtype}\")\n",
    "            print(f\"  Weight shape: {lora_layer.weight.shape}, device: {lora_layer.weight.device}, dtype: {lora_layer.weight.dtype}\")\n",
    "            \n",
    "            # 替换\n",
    "            setattr(parent, module_name, lora_layer)\n",
    "            print(f\"Replaced {name} with LoRA layer\")\n",
    "\n",
    "replace_linear_with_lora(model, lora_rank, lora_alpha, device=device, dtype=torch_dtype)\n",
    "\n",
    "# 冻结非 LoRA 参数\n",
    "for name, param in model.named_parameters():\n",
    "    if \"lora_\" not in name:\n",
    "        param.requires_grad = False\n",
    "\n",
    "# 检查可训练参数\n",
    "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
    "print(f\"Trainable Parameters: {trainable_params:,}\")\n",
    "\n",
    "# 验证所有 LoRA 参数\n",
    "for name, param in model.named_parameters():\n",
    "    if \"lora_\" in name:\n",
    "        print(f\"Parameter {name}: device={param.device}, dtype={param.dtype}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "bd54255d-48f4-4005-a0ee-fe8750fa62c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10, Average Loss: nan\n",
      "Epoch 2/10, Average Loss: nan\n",
      "Epoch 3/10, Average Loss: nan\n",
      "Epoch 4/10, Average Loss: nan\n",
      "Epoch 5/10, Average Loss: nan\n",
      "Epoch 6/10, Average Loss: nan\n",
      "Epoch 7/10, Average Loss: nan\n",
      "Epoch 8/10, Average Loss: nan\n",
      "Epoch 9/10, Average Loss: nan\n",
      "Epoch 10/10, Average Loss: nan\n"
     ]
    }
   ],
   "source": [
    "# 优化器\n",
    "optimizer = torch.optim.AdamW(\n",
    "    [p for p in model.parameters() if p.requires_grad],\n",
    "    lr=learning_rate\n",
    ")\n",
    "\n",
    "# 训练\n",
    "model.train()\n",
    "for epoch in range(num_epochs):\n",
    "    total_loss = 0\n",
    "    for batch in dataloader:\n",
    "        input_ids = batch[\"input_ids\"].to(device)\n",
    "        attention_mask = batch[\"attention_mask\"].to(device)\n",
    "        labels = batch[\"labels\"].to(device)\n",
    "        \n",
    "        outputs = model(\n",
    "            input_ids=input_ids,\n",
    "            attention_mask=attention_mask,\n",
    "            labels=labels\n",
    "        )\n",
    "        \n",
    "        loss = outputs.loss\n",
    "        total_loss += loss.item()\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "    \n",
    "    avg_loss = total_loss / len(dataloader)\n",
    "    print(f\"Epoch {epoch + 1}/{num_epochs}, Average Loss: {avg_loss:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4bcafc28-1979-4744-a844-0817ccc05acd",
   "metadata": {},
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "probability tensor contains either `inf`, `nan` or element < 0",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[19], line 7\u001b[0m\n\u001b[1;32m      5\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tokenizer(prompt, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m----> 7\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput_ids\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mattention_mask\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdo_sample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     12\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.9\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.7\u001b[39;49m\n\u001b[1;32m     14\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     16\u001b[0m generated_text \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mdecode(outputs[\u001b[38;5;241m0\u001b[39m], skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrompt: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprompt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/generation/utils.py:2465\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, **kwargs)\u001b[0m\n\u001b[1;32m   2457\u001b[0m     input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m   2458\u001b[0m         input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m   2459\u001b[0m         expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[1;32m   2460\u001b[0m         is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m   2461\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m   2462\u001b[0m     )\n\u001b[1;32m   2464\u001b[0m     \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 2465\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2466\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2467\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2468\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2469\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2470\u001b[0m \u001b[43m        \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2471\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2472\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2473\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2475\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m   2476\u001b[0m     \u001b[38;5;66;03m# 11. interleave input_ids with `num_beams` additional sequences per batch\u001b[39;00m\n\u001b[1;32m   2477\u001b[0m     input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m   2478\u001b[0m         input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m   2479\u001b[0m         expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_beams,\n\u001b[1;32m   2480\u001b[0m         is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m   2481\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m   2482\u001b[0m     )\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.12/site-packages/transformers/generation/utils.py:3476\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m   3474\u001b[0m     probs \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mfunctional\u001b[38;5;241m.\u001b[39msoftmax(next_token_scores, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m   3475\u001b[0m     \u001b[38;5;66;03m# TODO (joao): this OP throws \"skipping cudagraphs due to ['incompatible ops']\", find solution\u001b[39;00m\n\u001b[0;32m-> 3476\u001b[0m     next_tokens \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmultinomial\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprobs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m   3477\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3478\u001b[0m     next_tokens \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39margmax(next_token_scores, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "\u001b[0;31mRuntimeError\u001b[0m: probability tensor contains either `inf`, `nan` or element < 0"
     ]
    }
   ],
   "source": [
    "# 测试微调模型\n",
    "prompt = \"Query: 你好你是谁\"\n",
    "\n",
    "model.eval()\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
    "with torch.no_grad():\n",
    "    outputs = model.generate(\n",
    "        inputs[\"input_ids\"],\n",
    "        attention_mask=inputs[\"attention_mask\"],\n",
    "        max_length=100,\n",
    "        do_sample=True,\n",
    "        top_p=0.9,\n",
    "        temperature=0.7\n",
    "    )\n",
    "    \n",
    "generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "print(f\"Prompt: {prompt}\")\n",
    "print(f\"Generated: {generated_text}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b4d74c1-ba74-4557-b58c-1bef527dcda0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7f91260-ea69-489e-826c-76b2110bf883",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f627c46-d1e8-4b5e-8bd6-36ebc00acb88",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aec9fb8d-5dd3-48aa-8172-c16d637e0662",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc6a55c4-b70b-46e9-9f66-d2a396c3ee8a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
